library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
# load in income data - code adapted from other students
bay_median_income_by_block <-
pullCensus("B19013_001E", bay_area_counties) %>%
filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
rename(
Median_Income = B19013_001E
) %>%
filter(!is.na(Median_Income)) %>%
left_join(bay_sd_at_home_average, by = c("blockgroup" = "origin_census_block_group")) %>%
filter(!is.na(device_count))
bay_ami_by_block <-
pullCensus("group(B19001)", bay_area_counties) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
group_by(blockgroup) %>%
summarize(
Total = B19001_001E,
`Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
#sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
`Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E)
) %>%
mutate(
`% under 75,000` = `Under 75,000` / Total * 100,
`% under 100,000` = `Under 100,000` / Total * 100
) %>%
left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)
) %>%
filter(!is.na(device_count))
# plotting
bay_ami_by_block %>%
ggplot(aes(
x = `% under 75,000`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes under $75,000 annually",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Households Below $75,000"
)
income_75_model <- lm(`% Completely at Home` ~ `% under 75,000`, bay_ami_by_block)
summary(income_75_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `% under 75,000`, data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.615 -4.965 0.576 5.531 25.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.615083 0.281849 186.68 <2e-16 ***
## `% under 75,000` -0.152449 0.006425 -23.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.538 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1064, Adjusted R-squared: 0.1062
## F-statistic: 563 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>%
ggplot(aes(
x = `% under 100,000`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes under $100,000 annually",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Households Below $100,000"
)
income_100_model <- lm(`% Completely at Home` ~ `% under 100,000`, bay_ami_by_block)
summary(income_100_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `% under 100,000`, data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.543 -4.765 0.590 5.545 23.900
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.542971 0.326103 167.26 <2e-16 ***
## `% under 100,000` -0.155785 0.005934 -26.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.438 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1272, Adjusted R-squared: 0.127
## F-statistic: 689.2 on 1 and 4728 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_ami_by_block %>%
ggplot(aes(
x = `% under 75,000`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes under $75,000 annually",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Households Below $75,000 Pre Shelter-in-Place"
)
income_75_model2 <- lm(`% Completely at Home Pre Shelter` ~ `% under 75,000`, bay_ami_by_block)
summary(income_75_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `% under 75,000`,
## data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.435 -3.290 -0.245 2.942 30.095
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.730991 0.165171 107.35 <2e-16 ***
## `% under 75,000` 0.113036 0.003765 30.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.003 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1601, Adjusted R-squared: 0.1599
## F-statistic: 901.3 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>%
ggplot(aes(
x = `% under 100,000`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes under $100,000 annually",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Households Below $100,000 Pre Shelter-in-Place"
)
income_100_model2 <- lm(`% Completely at Home Pre Shelter` ~ `% under 100,000`, bay_ami_by_block)
summary(income_100_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `% under 100,000`,
## data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.9262 -3.3078 -0.3057 2.9109 30.2334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.639978 0.192226 86.56 <2e-16 ***
## `% under 100,000` 0.108863 0.003498 31.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.974 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.17, Adjusted R-squared: 0.1699
## F-statistic: 968.6 on 1 and 4728 DF, p-value: < 2.2e-16
# loading in language data - code adapted from other students
bay_lang_by_block <-
pullCensus("group(B16004)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
left_join(acs_vars, by = c("variable" = "name")) %>%
mutate(
tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
) %>%
filter(tier %in% c('Speak English "not well"',
'Speak English "not at all"',
'Total', 'Speak Spanish',
'Speak Asian and Pacific Island languages')) %>%
group_by(blockgroup, tier) %>%
summarise(
estimate1 = sum(estimate)
) %>%
spread(
key = "tier",
value = "estimate1"
) %>%
mutate(
`% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
`% speaking spanish` = (`Speak Spanish`/ Total) * 100,
`% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
) %>%
left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
filter(!is.na(device_count)) %>%
mutate(log_perc = log(`% speaking english < well`))
# plotting
bay_lang_by_block %>%
ggplot(aes(
x = `% speaking english < well`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English less than well",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and English Language Ability"
)
english_ability_model <- lm(`% Completely at Home` ~ `% speaking english < well`, bay_lang_by_block)
summary(english_ability_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `% speaking english < well`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.028 -5.520 0.373 5.825 28.695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.633533 0.177956 262.052 <2e-16 ***
## `% speaking english < well` -0.007336 0.015542 -0.472 0.637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.081 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 4.706e-05, Adjusted R-squared: -0.0001642
## F-statistic: 0.2228 on 1 and 4734 DF, p-value: 0.6369
bay_lang_by_block %>%
ggplot(aes(
x = `% speaking spanish`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking Spanish",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Spanish Language Ability"
)
spanish_speaking_model <- lm(`% Completely at Home` ~ `% speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `% speaking spanish`, data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.864 -5.124 0.483 5.715 28.442
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.457820 0.174164 278.2 <2e-16 ***
## `% speaking spanish` -0.117490 0.007343 -16.0 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.845 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.05131, Adjusted R-squared: 0.05111
## F-statistic: 256 on 1 and 4734 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_lang_by_block %>%
ggplot(aes(
x = `% speaking english < well`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English less than well",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and English Language Ability Pre Shelter-in-Place"
)
english_ability_model2 <- lm(`% Completely at Home Pre Shelter` ~ `% speaking english < well`, bay_lang_by_block)
summary(english_ability_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `% speaking english < well`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.914 -3.676 -0.362 3.136 33.866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.855961 0.103496 201.51 <2e-16 ***
## `% speaking english < well` 0.173148 0.009037 19.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.279 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.07199, Adjusted R-squared: 0.0718
## F-statistic: 367.1 on 1 and 4732 DF, p-value: < 2.2e-16
bay_lang_by_block %>%
ggplot(aes(
x = `% speaking spanish`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking Spanish",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Spanish Language Ability Pre Shelter-in-Place"
)
spanish_speaking_model2 <- lm(`% Completely at Home Pre Shelter` ~ `% speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `% speaking spanish`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.869 -3.603 -0.413 3.159 32.848
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.853569 0.104039 200.44 <2e-16 ***
## `% speaking spanish` 0.083275 0.004386 18.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.283 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.0708, Adjusted R-squared: 0.07061
## F-statistic: 360.6 on 1 and 4732 DF, p-value: < 2.2e-16
# loading in age data - specifically looking at percentage 65+ and percentage <30
bay_age_by_block <-
pullCensus("group(B01001)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
mutate(
label = acs_vars$label[match(variable,acs_vars$name)]
) %>%
select(-variable) %>%
separate(
label,
into = c(NA,NA,"sex","age"),
sep = "!!"
) %>% filter(!is.na(age)) %>%
mutate(elderly = ifelse(age %in% c("65 and 66 years", "67 to 69 years", "70 to 74 years", "75 to 79 years", "80 to 84 years", "85 years and over"), estimate, NA), `less than 30` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years", "18 and 19 years", "20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA)) %>%
group_by(blockgroup) %>%
summarize(elderly = sum(elderly, na.rm = T), `less than 30` = sum(`less than 30`, na.rm = T), total = sum(estimate, na.rm = T)) %>%
mutate(`percent elderly` = elderly*100 / total, `percent less than 30` = `less than 30`*100 / total) %>%
left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
filter(!is.na(device_count))
# plotting
bay_age_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Young Age Groups"
)
young_model <- lm(bay_age_by_block$`% Completely at Home` ~ bay_age_by_block$`percent less than 30`)
summary(young_model)
##
## Call:
## lm(formula = bay_age_by_block$`% Completely at Home` ~ bay_age_by_block$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.096 -5.313 0.292 5.739 30.226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.85035 0.46160 110.161 <2e-16
## bay_age_by_block$`percent less than 30` -0.12054 0.01249 -9.652 <2e-16
##
## (Intercept) ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.993 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.0193, Adjusted R-squared: 0.01909
## F-statistic: 93.16 on 1 and 4734 DF, p-value: < 2.2e-16
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents ages 65 and older",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Elderly Population"
)
elderly_model <- lm(`% Completely at Home` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `percent elderly`, data = bay_age_by_block %>%
## filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.229 -5.505 0.352 5.766 29.349
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.79736 0.28066 163.178 < 2e-16 ***
## `percent elderly` 0.05361 0.01621 3.308 0.000946 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.05 on 4693 degrees of freedom
## Multiple R-squared: 0.002327, Adjusted R-squared: 0.002114
## F-statistic: 10.94 on 1 and 4693 DF, p-value: 0.0009461
Compare to pre-shelter-in-place behavior:
bay_age_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Young Age Groups Pre Shelter-in-Place"
)
young_model2 <- lm(bay_age_by_block$`% Completely at Home Pre Shelter` ~ bay_age_by_block$`percent less than 30`)
summary(young_model2)
##
## Call:
## lm(formula = bay_age_by_block$`% Completely at Home Pre Shelter` ~
## bay_age_by_block$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.296 -3.749 -0.283 3.332 33.649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.226471 0.281877 75.304 < 2e-16
## bay_age_by_block$`percent less than 30` 0.027096 0.007631 3.551 0.000388
##
## (Intercept) ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.473 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.002657, Adjusted R-squared: 0.002447
## F-statistic: 12.61 on 1 and 4732 DF, p-value: 0.0003878
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents ages 65 and older",
y = "Percent devices completely at home onw eekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Elderly Population Pre Shelter-in-Place"
)
elderly_model2 <- lm(`% Completely at Home Pre Shelter` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `percent elderly`,
## data = bay_age_by_block %>% filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.810 -3.681 -0.331 3.316 32.714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.453206 0.167597 139.938 <2e-16 ***
## `percent elderly` -0.085606 0.009675 -8.848 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.399 on 4691 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.01642, Adjusted R-squared: 0.01621
## F-statistic: 78.29 on 1 and 4691 DF, p-value: < 2.2e-16
# also get data on vehicles available as households without a vehicle
bay_no_vehicles_by_block <- pullCensus("group(B25044)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, NA,"vehicles"), sep = "!!") %>%
filter(!is.na(vehicles)) %>%
group_by(blockgroup, vehicles) %>%
summarize(grouped_vehicles = sum(estimate)) %>%
spread(key = vehicles, value = grouped_vehicles) %>%
mutate(total_nums = `1 vehicle available` + `2 vehicles available` + `3 vehicles available` + `4 vehicles available` + `5 or more vehicles available` + `No vehicle available`, `percent no vehicles` = `No vehicle available`*100 / total_nums, `log percent` = log(`percent no vehicles`)) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_no_vehicles_by_block %>%
ggplot(aes(
x = `percent no vehicles`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with no vehicle available",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Vehicle Availability"
)
vehicles_model <- lm(`% Completely at Home` ~ `percent no vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `percent no vehicles`,
## data = bay_no_vehicles_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.150 -5.519 0.323 5.838 28.323
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.97701 0.15808 297.166 < 2e-16 ***
## `percent no vehicles` -0.04342 0.01048 -4.143 3.49e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.016 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.003617, Adjusted R-squared: 0.003406
## F-statistic: 17.16 on 1 and 4728 DF, p-value: 3.491e-05
Compare to pre-shelter-in-place behavior:
bay_no_vehicles_by_block %>%
ggplot(aes(
x = `percent no vehicles`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with no vehicle available",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Vehicle Availability Pre Shelter-in-Place"
)
vehicles_model2 <- lm(`% Completely at Home Pre Shelter` ~ `percent no vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `percent no vehicles`,
## data = bay_no_vehicles_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.1954 -3.5354 -0.2145 3.2767 30.3199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.180091 0.092106 229.95 <2e-16 ***
## `percent no vehicles` 0.118945 0.006106 19.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.253 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.07429, Adjusted R-squared: 0.0741
## F-statistic: 379.5 on 1 and 4728 DF, p-value: < 2.2e-16
# get data on occupants per room
bay_occupants_per_room_by_block <- pullCensus("group(B25014)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>%
filter(!is.na(`occupants per room`)) %>%
group_by(blockgroup, `occupants per room`) %>%
summarize(estimate_tot = sum(estimate)) %>%
spread(key = `occupants per room`, value = estimate_tot) %>%
mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`, `percent 1 or more` = (`1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) * 100/ total_nums) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_occupants_per_room_by_block %>%
ggplot(aes(
x = `percent 1 or more`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with more than 1 occupant per room",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Room Occupancy"
)
occupants_model <- lm(`% Completely at Home` ~ `percent 1 or more`, bay_occupants_per_room_by_block)
summary(occupants_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `percent 1 or more`, data = bay_occupants_per_room_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.191 -5.440 0.355 5.746 28.102
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.19777 0.16630 283.812 < 2e-16 ***
## `percent 1 or more` -0.08743 0.01529 -5.719 1.14e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.001 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.00687, Adjusted R-squared: 0.00666
## F-statistic: 32.71 on 1 and 4728 DF, p-value: 1.138e-08
Compare to pre-shelter-in-place behavior:
bay_occupants_per_room_by_block %>%
ggplot(aes(
x = `percent 1 or more`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with more than 1 occupant per room",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Room Occupancy Pre Shelter-in-Place"
)
occupants_model2 <- lm(`% Completely at Home Pre Shelter` ~ `percent 1 or more`, bay_occupants_per_room_by_block)
summary(occupants_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `percent 1 or more`,
## data = bay_occupants_per_room_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.116 -3.671 -0.316 3.161 30.361
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.138916 0.097823 216.1 <2e-16 ***
## `percent 1 or more` 0.155550 0.008993 17.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.295 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.05951, Adjusted R-squared: 0.05931
## F-statistic: 299.2 on 1 and 4728 DF, p-value: < 2.2e-16
bay_education_by_block <- pullCensus("group(B15003)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, "education level"), sep = "!!") %>%
mutate(`education level` = replace_na(`education level`, "total_educ")) %>% # if the education level field is NA, this corresponded to the total number in that blockgroup
spread(key = `education level`, value = estimate) %>%
mutate(`percent associates or higher` = (`Associate's degree` + `Bachelor's degree` + `Doctorate degree` + `Master's degree`)*100/total_educ, `percent less than associates` = 100-`percent associates or higher`) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_education_by_block %>%
ggplot(aes(
x = `percent less than associates`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people without an degree at Associate's level or higher",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Education"
)
educ_model <- lm(`% Completely at Home` ~ `percent less than associates`, bay_education_by_block)
summary(educ_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `percent less than associates`,
## data = bay_education_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.727 -4.859 0.715 5.691 26.864
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.47890 0.33405 160.09 <2e-16 ***
## `percent less than associates` -0.13917 0.00625 -22.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.628 on 4733 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.09482, Adjusted R-squared: 0.09463
## F-statistic: 495.8 on 1 and 4733 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_education_by_block %>%
ggplot(aes(
x = `percent less than associates`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people without an degree at Associate's level or higher",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Education Pre Shelter-in-Place"
)
educ_model2 <- lm(`% Completely at Home Pre Shelter` ~ `percent less than associates`, bay_education_by_block)
summary(educ_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `percent less than associates`,
## data = bay_education_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3499 -3.4893 -0.4311 3.0081 31.2636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.595763 0.199664 88.13 <2e-16 ***
## `percent less than associates` 0.092690 0.003737 24.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.155 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.1151, Adjusted R-squared: 0.1149
## F-statistic: 615.3 on 1 and 4732 DF, p-value: < 2.2e-16
Motivated by this paper https://www.nber.org/papers/w26982.pdf on social distancing, internet access, and inequality, we look at whether a household has “Broadband (high-speed) Internet service such as cable, fiber optic, or DSL service,” and staying at home.
bay_internet_by_block <- pullCensus("group(B28002)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>%
filter(is.na(subscription) | (type == "Broadband such as cable, fiber optic or DSL") & is.na(additional)) %>%
mutate(type = replace_na(type, "total_num")) %>%
dplyr::select(blockgroup, type, estimate) %>%
spread(key = type, value = estimate) %>%
mutate(`percent high speed` = `Broadband such as cable, fiber optic or DSL`*100/total_num, `percent no high speed` = 100-`percent high speed`) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_internet_by_block %>%
ggplot(aes(
x = `percent no high speed`,
y = `% Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households without broadband such as cable, fiber optic or DSL",
y = "Percent devices completely at home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and High Speed Internet"
)
internet_model <- lm(`% Completely at Home` ~ `percent no high speed`, bay_internet_by_block)
summary(internet_model)
##
## Call:
## lm(formula = `% Completely at Home` ~ `percent no high speed`,
## data = bay_internet_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.141 -5.086 0.442 5.622 28.112
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.656273 0.228034 222.14 <2e-16 ***
## `percent no high speed` -0.196780 0.009262 -21.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.63 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.08715, Adjusted R-squared: 0.08695
## F-statistic: 451.4 on 1 and 4728 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_internet_by_block %>%
ggplot(aes(
x = `percent no high speed`,
y = `% Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households without broadband such as cable, fiber optic or DSL",
y = "Percent devices completely at home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and High Speed Internet Pre Shelter-in-Place"
)
internet_model2 <- lm(`% Completely at Home Pre Shelter` ~ `percent no high speed`, bay_internet_by_block)
summary(internet_model2)
##
## Call:
## lm(formula = `% Completely at Home Pre Shelter` ~ `percent no high speed`,
## data = bay_internet_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.8729 -3.5344 -0.1585 3.1107 29.3893
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.52334 0.13663 142.89 <2e-16 ***
## `percent no high speed` 0.12937 0.00555 23.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.171 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1031, Adjusted R-squared: 0.1029
## F-statistic: 543.4 on 1 and 4728 DF, p-value: < 2.2e-16
Multiple regression analysis with income, education, and internet
# multiple regression
modeltest <- lm(bay_ami_by_block$`% Completely at Home` ~ bay_ami_by_block$`% under 100,000` + bay_education_by_block$`percent less than associates` + bay_internet_by_block$`percent no high speed`)
summary(modeltest)
##
## Call:
## lm(formula = bay_ami_by_block$`% Completely at Home` ~ bay_ami_by_block$`% under 100,000` +
## bay_education_by_block$`percent less than associates` + bay_internet_by_block$`percent no high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.421 -4.717 0.644 5.622 25.069
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 55.091416 0.350698
## bay_ami_by_block$`% under 100,000` -0.102925 0.008964
## bay_education_by_block$`percent less than associates` -0.038937 0.008728
## bay_internet_by_block$`percent no high speed` -0.063835 0.012003
## t value Pr(>|t|)
## (Intercept) 157.091 < 2e-16 ***
## bay_ami_by_block$`% under 100,000` -11.481 < 2e-16 ***
## bay_education_by_block$`percent less than associates` -4.461 8.34e-06 ***
## bay_internet_by_block$`percent no high speed` -5.318 1.09e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.384 on 4726 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1387, Adjusted R-squared: 0.1382
## F-statistic: 253.8 on 3 and 4726 DF, p-value: < 2.2e-16